1 Introduction

Here, we will reanalyse the 3 cases of CLL that we published in our previous study to check:

  1. Whether we can identify a similar trajectory as we identified previously (CXCR4/CD27/MIR155HG).
  2. Whether these cells show an upregulated OXPHOS or downregulated BCR signaling.

This will give us a negative control to ensure our observations are specific or not to Richter transformation.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(SeuratWrappers)
library(harmony)
library(ggpubr)
library(clusterProfiler)
library(org.Hs.eg.db)
library(tidyverse)

2.2 Define parameters

path_to_obj <- here::here("results/R_objects/cll_seurat_annotated.rds")
path_to_save <- here::here("results/R_objects/cll_seurat_annotated_list.rds")


# Thresholds
max_pct_mt <- 22.5
min_n_features <- 600


# Source functions
source(here::here("bin/utils.R"))

2.3 Load data

seurat <- readRDS(path_to_obj)
colnames(seurat@meta.data)[colnames(seurat@meta.data) == "cell_type"] <- "annotation"
DimPlot(seurat, group.by = "annotation")

3 Filter data and reprocess

# Exclude microenvironment
selected_cells_1 <- seurat$annotation %in% c("CLL 1892", "CLL 1472", "CLL 1220")
seurat <- subset(seurat, cells = colnames(seurat)[selected_cells_1])
DimPlot(seurat)

# Keep only cells stored at RT or 4C for <= 2h
selected_cells_2 <- 
  (seurat$time %in% c("0h", "2h") & seurat$temperature == "RT") |
  (seurat$temperature == "4C")
seurat <- subset(seurat, cells = colnames(seurat)[selected_cells_2])
DimPlot(seurat, group.by  = "temperature")

table(seurat$temperature, seurat$time)
##     
##        0h   2h   4h   6h   8h  24h
##   4C 3863 1264 1274  996 1507 1387
##   RT 3893 3502    0    0    0    0
# Apply more stringent thresholds (% mitochondrial expression)
FeatureScatter(seurat, "nFeature_RNA", "percent_mt")

hist(seurat$percent_mt, 100)

seurat <- subset(
  seurat,
  subset = percent_mt < max_pct_mt & nFeature_RNA > min_n_features
)


# Split by donor and reprocess
seurat_list <- SplitObject(seurat, split.by = "donor")
donors <- c("1220", "1472", "1892")
seurat_list <- seurat_list[donors]
seurat_list <- purrr::map(donors, function(x) {
  seurat_obj <- seurat_list[[x]]
  seurat_obj <- seurat_obj %>%
    NormalizeData() %>%
    FindVariableFeatures() %>%
    ScaleData() %>%
    RunPCA()
  if (x == "1220") {
    seurat_obj <- seurat_obj %>%
      RunUMAP(reduction = "pca", dims = 1:20)
  } else if (x %in% c("1472", "1892")) {
    seurat_obj <- seurat_obj %>%
      RunHarmony(
        reduction = "pca",
        dims = 1:20,
        group.by.vars = "temperature"
      ) %>% 
      RunUMAP(reduction = "harmony", dims = 1:20)    
  }
  seurat_obj
})
names(seurat_list) <- donors
feat_plots_1 <- purrr::map(seurat_list, function(seurat_obj) {
  ps <- purrr::map(c("CXCR4", "CD27", "MIR155HG"), function(x) {
    p <- FeaturePlot(seurat_obj, features = x, pt.size = 0.65) +
      scale_color_viridis_c(option = "magma")
  })
  ggarrange(plotlist = ps, ncol = 3, common.legend = TRUE)
})
feat_plots_1
## $`1220`

## 
## $`1472`

## 
## $`1892`

3.1 Subcluster

Overall, we can see how the main driver of variance is again the exposure to the lymph node microenvironment lymph. In the 3 donors, we see 3 main regions, with mutually exclusive expression of CXCR4, CD27 and MIR155HG.

In addition, we see that for 1892 we can identify a minor subclon:

resolutions <- c(0.2, 0.2)
seurat_list[c("1472", "1892")] <- purrr::map2(
  seurat_list[c("1472", "1892")], 
  resolutions,
  function(seurat_obj, res) {
    seurat_obj <- FindNeighbors(
      seurat_obj,
      reduction = "harmony",
      dims = 1:20
    )
    seurat_obj <- FindClusters(seurat_obj, resolution = res)
    seurat_obj
})
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 4975
## Number of edges: 138155
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8073
## Number of communities: 2
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 9116
## Number of edges: 254574
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8644
## Number of communities: 5
## Elapsed time: 1 seconds
DimPlot(seurat_list$`1472`)

DimPlot(seurat_list$`1892`)

clusters_of_interest <- list(
  "1472" = c("1"),
  "1892" = c("3", "4")
)


# Markers
markers_all <- purrr::map(
  seurat_list[c("1472", "1892")],
  FindAllMarkers,
  only.pos = FALSE,
  logfc.threshold = 0.1
)
markers_all <- purrr::map(names(markers_all), function(x) {
  df <- markers_all[[x]]
  df <- df %>% 
    dplyr::filter(cluster %in% clusters_of_interest[[x]], p_val_adj < 0.001) %>%
    group_by(cluster) %>%
    dplyr::arrange(desc(avg_log2FC), .by_group = TRUE) %>%
    ungroup()
  df
})
names(markers_all) <- names(clusters_of_interest)
DT::datatable(markers_all$`1472`)
DT::datatable(markers_all$`1892`)

3.2 Plot markers of interest

markers_of_interest <- c("PCDH9", "TTN", "FCRL5", "BCL11A", "ATM", "CHD2",
                         "OGA", "OGT", "PAX5", "DDX17", "BACH2", "BCL2",
                         "BIRC3", "IL4R", "KMT2A", "GLUL", "S100A10")
markers_of_interest_gg <- purrr::map(markers_of_interest, function(x) {
  ps <- purrr::map(names(seurat_list), function(donor) {
    p <- FeaturePlot(seurat_list[[donor]], features = x, pt.size = 0.6) +
      scale_color_viridis_c(option = "inferno") +
      ggtitle(str_c(donor, x, sep = "_")) +
      theme(plot.title = element_text(size = 13, hjust = 0.5))
  })
  out <- ggarrange(plotlist = ps, ncol = 3, common.legend = TRUE)
})
names(markers_of_interest_gg) <- markers_of_interest
markers_of_interest_gg
## $PCDH9

## 
## $TTN

## 
## $FCRL5

## 
## $BCL11A

## 
## $ATM

## 
## $CHD2

## 
## $OGA

## 
## $OGT

## 
## $PAX5

## 
## $DDX17

## 
## $BACH2

## 
## $BCL2

## 
## $BIRC3

## 
## $IL4R

## 
## $KMT2A

## 
## $GLUL

## 
## $S100A10

4 Gene Set Enrichment Analysis (GSEA)

To shed light into the identity of these novel clusters, let us perform a GSEA with clusterProfiler.

gene_lists <- list(
  "1472" = markers_all$`1472`[markers_all$`1472`$cluster == "1", "avg_log2FC", drop = TRUE],
  "1892" = markers_all$`1892`[markers_all$`1892`$cluster == "4", "avg_log2FC", drop = TRUE]
)
names(gene_lists$`1472`) <- markers_all$`1472`$gene[markers_all$`1472`$cluster == "1"]
names(gene_lists$`1892`) <- markers_all$`1892`$gene[markers_all$`1892`$cluster == "4"]
gsea_pcdh9_clusters <- purrr::map(gene_lists, function(x) {
  gsea_results <- gseGO(
    x,
    ont = "BP",
    OrgDb = org.Hs.eg.db,
    keyType = "SYMBOL",
    minGSSize = 30,
    maxGSSize = 300,
    seed = TRUE
  )
  gsea_results <- clusterProfiler::simplify(gsea_results, cutoff = 0.35)
  gsea_results@result <- gsea_results@result %>%
    arrange(desc(NES))
  gsea_results
})
DT::datatable(gsea_pcdh9_clusters$`1472`@result)
DT::datatable(gsea_pcdh9_clusters$`1892`@result)
oxphos_gg <- purrr::map(gsea_pcdh9_clusters, function(obj) {
  p <- gseaplot(
    obj,
    geneSetID = "GO:0006119",
    by = "runningScore",
    title = "oxidative phosphorylation"
  )
  p
})
oxphos_gg
## $`1472`

## 
## $`1892`

bcr_gg <- purrr::map(gsea_pcdh9_clusters, function(obj) {
  p <- gseaplot(
    obj,
    geneSetID = "GO:0050853",
    by = "runningScore",
    title = "BCR signaling"
  )
  p
})
bcr_gg
## $`1472`

## 
## $`1892`

5 OXPHOS and BCR signaling signatures

signatures <- list(
  b_cell_signaling = gsea_pcdh9_clusters$`1892`@geneSets$`GO:0050853`,
  oxphos = gsea_pcdh9_clusters$`1892`@geneSets$`GO:0006119`,
  mitochondrial_translation = gsea_pcdh9_clusters$`1892`@geneSets$`GO:0032543`
)
seurat_list <- purrr::map(seurat_list, function(seurat_obj) {
  seurat_obj <- AddModuleScore(
    seurat_obj,
    features = signatures,
    name = names(signatures)
  )
  seurat_obj
})
signatures_pots <- purrr::map(
  c("b_cell_signaling1", "oxphos2", "mitochondrial_translation3"),
  function(x) {
    p1 <- FeaturePlot(seurat_list$`1220`, x, pt.size = 0.7) +
      scale_color_viridis_c(option = "magma")
    p2 <- FeaturePlot(seurat_list$`1472`, x, pt.size = 0.7) +
      scale_color_viridis_c(option = "magma")
    p3 <- FeaturePlot(seurat_list$`1892`, x, pt.size = 0.7) +
      scale_color_viridis_c(option = "magma")
    out <- ggarrange(
      plotlist = list(p1, p2, p3),
      ncol = 3,
      common.legend = TRUE
    )
    out
})
signatures_pots
## [[1]]

## 
## [[2]]

## 
## [[3]]

6 Save

saveRDS(seurat_list, path_to_save)

7 Session Information

sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1          stringr_1.4.0          dplyr_1.0.6            purrr_0.3.4            readr_1.4.0            tidyr_1.1.3            tibble_3.1.2           tidyverse_1.3.1        org.Hs.eg.db_3.12.0    AnnotationDbi_1.52.0   IRanges_2.24.1         S4Vectors_0.28.1       Biobase_2.50.0         BiocGenerics_0.36.1    clusterProfiler_3.18.1 ggpubr_0.4.0           ggplot2_3.3.3          harmony_1.0            Rcpp_1.0.6             SeuratWrappers_0.3.0   SeuratObject_4.0.2     Seurat_4.0.3           BiocStyle_2.18.1      
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.1            reticulate_1.20       tidyselect_1.1.1      RSQLite_2.2.7         htmlwidgets_1.5.3     grid_4.0.4            BiocParallel_1.24.1   Rtsne_0.15            scatterpie_0.1.6      munsell_0.5.0         codetools_0.2-18      ica_1.0-2             DT_0.18               future_1.21.0         miniUI_0.1.1.1        withr_2.4.2           colorspace_2.0-1      GOSemSim_2.16.1       highr_0.9             knitr_1.33            rstudioapi_0.13       ROCR_1.0-11           ggsignif_0.6.2        tensor_1.5            DOSE_3.16.0           listenv_0.8.0         labeling_0.4.2        polyclip_1.10-0       bit64_4.0.5           farver_2.1.0          rprojroot_2.0.2       downloader_0.4        parallelly_1.26.0     vctrs_0.3.8           generics_0.1.0        xfun_0.23             R6_2.5.0              graphlayouts_0.7.1    rsvd_1.0.5            spatstat.utils_2.2-0  cachem_1.0.5          fgsea_1.16.0          assertthat_0.2.1      promises_1.2.0.1      scales_1.1.1          ggraph_2.0.5          enrichplot_1.10.2     gtable_0.3.0          globals_0.14.0        goftest_1.2-2         tidygraph_1.2.0       rlang_0.4.11          splines_4.0.4         rstatix_0.7.0        
##  [55] lazyeval_0.2.2        spatstat.geom_2.1-0   broom_0.7.7           modelr_0.1.8          BiocManager_1.30.15   yaml_2.2.1            reshape2_1.4.4        abind_1.4-5           crosstalk_1.1.1       backports_1.2.1       httpuv_1.6.1          qvalue_2.22.0         tools_4.0.4           bookdown_0.22         ellipsis_0.3.2        spatstat.core_2.1-2   jquerylib_0.1.4       RColorBrewer_1.1-2    ggridges_0.5.3        plyr_1.8.6            rpart_4.1-15          deldir_0.2-10         viridis_0.6.1         pbapply_1.4-3         cowplot_1.1.1         zoo_1.8-9             haven_2.4.1           ggrepel_0.9.1         cluster_2.1.1         here_1.0.1            fs_1.5.0              magrittr_2.0.1        RSpectra_0.16-0       data.table_1.14.0     scattermore_0.7       DO.db_2.9             openxlsx_4.2.3        reprex_2.0.0          lmtest_0.9-38         RANN_2.6.1            fitdistrplus_1.1-5    matrixStats_0.59.0    hms_1.1.0             patchwork_1.1.1       mime_0.10             evaluate_0.14         xtable_1.8-4          rio_0.5.26            readxl_1.3.1          gridExtra_2.3         compiler_4.0.4        shadowtext_0.0.8      KernSmooth_2.23-18    crayon_1.4.1         
## [109] htmltools_0.5.1.1     mgcv_1.8-36           later_1.2.0           lubridate_1.7.10      DBI_1.1.1             tweenr_1.0.2          dbplyr_2.1.1          MASS_7.3-53.1         Matrix_1.3-4          car_3.0-10            cli_2.5.0             igraph_1.2.6          pkgconfig_2.0.3       rvcheck_0.1.8         foreign_0.8-81        plotly_4.9.4          spatstat.sparse_2.0-0 xml2_1.3.2            bslib_0.2.5.1         rvest_1.0.0           digest_0.6.27         sctransform_0.3.2     RcppAnnoy_0.0.18      spatstat.data_2.1-0   rmarkdown_2.8         cellranger_1.1.0      leiden_0.3.8          fastmatch_1.1-0       uwot_0.1.10           curl_4.3.1            shiny_1.6.0           lifecycle_1.0.0       nlme_3.1-152          jsonlite_1.7.2        carData_3.0-4         limma_3.46.0          viridisLite_0.4.0     fansi_0.5.0           pillar_1.6.1          lattice_0.20-41       fastmap_1.1.0         httr_1.4.2            survival_3.2-10       GO.db_3.12.1          glue_1.4.2            remotes_2.4.0         zip_2.2.0             png_0.1-7             bit_4.0.4             ggforce_0.3.3         stringi_1.6.2         sass_0.4.0            blob_1.2.1            memoise_2.0.0        
## [163] irlba_2.3.3           future.apply_1.7.0